Without considering the binary output and categorical variables in the data set, compare the following clustering technique results:
# import libraries
##################
# Helper packages
library(dplyr) # for data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) # for data visualization
library(stringr) # for string functionality
library(gridExtra) # for manipulaiting the grid
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# Modeling packages
library(tidyverse) # data manipulation
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ purrr 0.3.5
## ✔ tidyr 1.1.4 ✔ forcats 0.5.1
## ✔ readr 2.1.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ gridExtra::combine() masks dplyr::combine()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(cluster) # for general clustering algorithms
library(factoextra) # for visualizing cluster results
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Modeling packages
library(mclust) # for fitting clustering algorithms
## Package 'mclust' version 6.0.0
## Type 'citation("mclust")' for citing this R package in publications.
##
## Attaching package: 'mclust'
##
## The following object is masked from 'package:purrr':
##
## map
# load essential data frame
df_m3 <- read.csv('./radiomics_completedata.csv')
# remove categorical and binary columns from the data frame
i1_m3 <- sapply(df_m3, is.numeric)
df_m3 <- df_m3[i1_m3]
df_m3 <- Filter(function(x) !all(x %in% c(0, 1)), df_m3)
# Check if there's any null values
df_m3 <- na.omit(df_m3)
# scale the data
final_m3 <- scale(df_m3)
final_m3 <- as.data.frame(final_m3)
Start the Clustering process for K-Means
# Determining Optimal Number of Clusters
set.seed(123)
#function to compute total within-cluster sum of square
wss <- function(k) {
kmeans(final_m3, k, nstart = 10)$tot.withinss
}
# Compute and plot wss for k = 1 to k = 15
k.values <- 1:15
# extract wss for 2-15 clusters
wss_values <- map_dbl(k.values, wss)
plot(k.values, wss_values,
type="b", pch = 19, frame = FALSE,
xlab="Number of clusters K",
ylab="Total within-clusters sum of squares")
#or use this
fviz_nbclust(final_m3, kmeans, method = "silhouette")
# compute gap statistic
set.seed(123)
gap_stat <- clusGap(final_m3, FUN = kmeans, nstart = 25,
K.max = 10, B = 50)
# Print the result
print(gap_stat, method = "firstmax")
## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = final_m3, FUNcluster = kmeans, K.max = 10, B = 50, nstart = 25)
## B=50 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
## --> Number of clusters (method 'firstmax'): 10
## logW E.logW gap SE.sim
## [1,] 7.171204 7.635853 0.4646496 0.009379996
## [2,] 6.879524 7.546674 0.6671493 0.008786338
## [3,] 6.798848 7.500436 0.7015873 0.007082545
## [4,] 6.760004 7.466467 0.7064633 0.006632270
## [5,] 6.715614 7.441579 0.7259645 0.006374244
## [6,] 6.689522 7.419633 0.7301115 0.006603869
## [7,] 6.661683 7.399745 0.7380616 0.006654018
## [8,] 6.643211 7.381624 0.7384134 0.006480643
## [9,] 6.616471 7.365139 0.7486677 0.006484664
## [10,] 6.588968 7.349544 0.7605765 0.006453097
fviz_gap_stat(gap_stat)
# Compute k-means clustering with k = 2
set.seed(123)
final <- kmeans(final_m3, 2, nstart = 25)
print(final)
## K-means clustering with 2 clusters of sizes 50, 147
##
## Cluster means:
## Failure Entropy_cooc.W.ADC GLNU_align.H.PET Min_hist.PET Max_hist.PET
## 1 -0.0014733768 0.04845450 -0.07901100 0.9204612 0.9468341
## 2 0.0005011486 -0.01648112 0.02687449 -0.3130820 -0.3220524
## Mean_hist.PET Variance_hist.PET Standard_Deviation_hist.PET Skewness_hist.PET
## 1 0.9216792 0.4594337 0.9319222 0.9115602
## 2 -0.3134963 -0.1562700 -0.3169804 -0.3100545
## Kurtosis_hist.PET Energy_hist.PET Entropy_hist.PET AUC_hist.PET H_suv.PET
## 1 0.25274217 0.6864958 1.5003007 1.6957546 0.9652219
## 2 -0.08596673 -0.2335020 -0.5103064 -0.5767873 -0.3283068
## Volume.PET X3D_surface.PET ratio_3ds_vol.PET ratio_3ds_vol_norm.PET
## 1 0.5900077 0.3802612 0.9436984 0.9622506
## 2 -0.2006829 -0.1293406 -0.3209858 -0.3272961
## irregularity.PET tumor_length.PET Compactness_v1.PET Compactness_v2.PET
## 1 1.6522842 1.0256292 0.8807232 0.4324058
## 2 -0.5620014 -0.3488535 -0.2995657 -0.1470768
## Spherical_disproportion.PET Sphericity.PET Asphericity.PET Center_of_mass.PET
## 1 0.9622506 0.4460709 0.9240341 0.6358358
## 2 -0.3272961 -0.1517248 -0.3142973 -0.2162707
## Max_3D_diam.PET Major_axis_length.PET Minor_axis_length.PET
## 1 0.8259982 0.8904297 1.1433164
## 2 -0.2809518 -0.3028672 -0.3888831
## Least_axis_length.PET Elongation.PET Flatness.PET Max_cooc.L.PET
## 1 0.9772289 1.4563692 1.3553445 0.7290795
## 2 -0.3323908 -0.4953637 -0.4610015 -0.2479862
## Average_cooc.L.PET Variance_cooc.L.PET Entropy_cooc.L.PET DAVE_cooc.L.PET
## 1 1.389215 1.1041050 1.6813985 1.2936781
## 2 -0.472522 -0.3755459 -0.5719043 -0.4400266
## DVAR_cooc.L.PET DENT_cooc.L.PET SAVE_cooc.L.PET SVAR_cooc.L.PET
## 1 1.1366603 1.6603800 1.3889879 1.1209781
## 2 -0.3866192 -0.5647551 -0.4724449 -0.3812851
## SENT_cooc.L.PET ASM_cooc.L.PET Contrast_cooc.L.PET Dissimilarity_cooc.L.PET
## 1 1.6614758 0.6775498 0.9285775 1.2936781
## 2 -0.5651278 -0.2304591 -0.3158427 -0.4400266
## Inv_diff_cooc.L.PET Inv_diff_norm_cooc.L.PET IDM_cooc.L.PET
## 1 1.443028 1.6979660 1.2814891
## 2 -0.490826 -0.5775395 -0.4358807
## IDM_norm_cooc.L.PET Inv_var_cooc.L.PET Correlation_cooc.L.PET
## 1 1.7046571 1.2896785 1.123648
## 2 -0.5798153 -0.4386661 -0.382193
## Autocorrelation_cooc.L.PET Tendency_cooc.L.PET Shade_cooc.L.PET
## 1 1.0338012 1.1209781 0.5578271
## 2 -0.3516331 -0.3812851 -0.1897371
## Prominence_cooc.L.PET IC1_.L.PET IC2_.L.PET Coarseness_vdif_.L.PET
## 1 0.7889007 -0.6341334 1.5273752 0.7537450
## 2 -0.2683336 0.2156916 -0.5195154 -0.2563758
## Contrast_vdif_.L.PET Busyness_vdif_.L.PET Complexity_vdif_.L.PET
## 1 0.3878173 0.5565230 1.2153015
## 2 -0.1319107 -0.1892936 -0.4133678
## Strength_vdif_.L.PET SRE_align.L.PET LRE_align.L.PET GLNU_align.L.PET
## 1 0.4934069 1.706523 1.6948229 0.4587983
## 2 -0.1678255 -0.580450 -0.5764704 -0.1560539
## RLNU_align.L.PET RP_align.L.PET LGRE_align.L.PET HGRE_align.L.PET
## 1 0.4189336 1.7061400 1.0408063 1.0700373
## 2 -0.1424944 -0.5803197 -0.3540158 -0.3639583
## LGSRE_align.L.PET HGSRE_align.L.PET LGHRE_align.L.PET HGLRE_align.L.PET
## 1 1.048281 1.0672364 1.0052958 1.078233
## 2 -0.356558 -0.3630056 -0.3419373 -0.366746
## GLNU_norm_align.L.PET RLNU_norm_align.L.PET GLVAR_align.L.PET
## 1 1.1041018 1.7034139 1.1510468
## 2 -0.3755448 -0.5793925 -0.3915125
## RLVAR_align.L.PET Entropy_align.L.PET SZSE.L.PET LZSE.L.PET LGLZE.L.PET
## 1 1.0474522 1.6880661 1.6676802 1.1852630 1.0601400
## 2 -0.3562762 -0.5741722 -0.5672382 -0.4031507 -0.3605919
## HGLZE.L.PET SZLGE.L.PET SZHGE.L.PET LZLGE.L.PET LZHGE.L.PET GLNU_area.L.PET
## 1 1.0866745 1.0735299 1.0776043 0.8457163 0.8914749 0.4621309
## 2 -0.3696172 -0.3651462 -0.3665321 -0.2876586 -0.3032228 -0.1571874
## ZSNU.L.PET ZSP.L.PET GLNU_norm.L.PET ZSNU_norm.L.PET GLVAR_area.L.PET
## 1 0.4218710 1.679008 1.1042309 1.681848 1.1694826
## 2 -0.1434935 -0.571091 -0.3755887 -0.572057 -0.3977832
## ZSVAR.L.PET Entropy_area.L.PET Max_cooc.H.PET Average_cooc.H.PET
## 1 0.7548095 1.6893793 0.5052232 1.6652563
## 2 -0.2567379 -0.5746188 -0.1718446 -0.5664137
## Variance_cooc.H.PET Entropy_cooc.H.PET DAVE_cooc.H.PET DVAR_cooc.H.PET
## 1 1.4721984 1.4404122 1.5079528 1.4645709
## 2 -0.5007478 -0.4899361 -0.5129091 -0.4981534
## DENT_cooc.H.PET SAVE_cooc.H.PET SVAR_cooc.H.PET SENT_cooc.H.PET
## 1 1.3368883 1.6782221 1.4484331 1.1582831
## 2 -0.4547239 -0.5708239 -0.4926643 -0.3939739
## ASM_cooc.H.PET Contrast_cooc.H.PET Dissimilarity_cooc.H.PET
## 1 0.4701159 1.344935 1.5079528
## 2 -0.1599034 -0.457461 -0.5129091
## Inv_diff_cooc.H.PET Inv_diff_norm_cooc.H.PET IDM_cooc.H.PET
## 1 1.1377441 1.6996628 0.9576980
## 2 -0.3869878 -0.5781166 -0.3257476
## IDM_norm_cooc.H.PET Inv_var_cooc_.H.PET Correlation_cooc.H.PET
## 1 1.7052806 0.9554037 1.1365587
## 2 -0.5800274 -0.3249672 -0.3865846
## Autocorrelation_cooc.H.PET Tendency_cooc.H.PET Shade_cooc.H.PET
## 1 1.5649714 1.4092944 -0.7124616
## 2 -0.5323032 -0.4793518 0.2423339
## Prominence_cooc.H.PET IC1_d.H.PET IC2_d.H.PET Coarseness_vdif.H.PET
## 1 1.0427158 -0.23095606 1.3345708 0.6663547
## 2 -0.3546653 0.07855648 -0.4539356 -0.2266512
## Contrast_vdif.H.PET Busyness_vdif.H.PET Complexity_vdif.H.PET
## 1 0.4860224 0.25301766 1.0958360
## 2 -0.1653138 -0.08606043 -0.3727333
## Strength_vdif.H.PET SRE_align.H.PET LRE_align.H.PET RLNU_align.H.PET
## 1 0.03112072 1.6638495 1.0890098 0.4166644
## 2 -0.01058528 -0.5659352 -0.3704115 -0.1417226
## RP_align.H.PET LGRE_align.H.PET HGRE_align.H.PET LGSRE_align.H.PET
## 1 1.6436641 0.7082866 1.5743684 0.7040204
## 2 -0.5590694 -0.2409138 -0.5354994 -0.2394627
## HGSRE_align.H.PET LGHRE_align.H.PET HGLRE_align.H.PET GLNU_norm_align.H.PET
## 1 1.6533952 0.7311054 0.7453460 0.8572435
## 2 -0.5623793 -0.2486753 -0.2535191 -0.2915794
## RLNU_norm_align.H.PET GLVAR_align.H.PET RLVAR_align.H.PET Entropy_align.H.PET
## 1 1.5584253 1.4161797 0.4776867 1.550297
## 2 -0.5300766 -0.4816938 -0.1624785 -0.527312
## SZSE.H.PET LZSE.H.PET LGLZE.H.PET HGLZE.H.PET SZLGE.H.PET SZHGE.H.PET
## 1 1.4671263 -0.09759617 0.7096710 1.4890573 0.6984264 1.4294579
## 2 -0.4990226 0.03319598 -0.2413847 -0.5064821 -0.2375600 -0.4862102
## LZLGE.H.PET LZHGE.H.PET GLNU_area.H.PET ZSNU.H.PET ZSP.H.PET
## 1 0.001044652 -0.08592571 0.4835029 0.3648643 1.1565208
## 2 -0.000355324 0.02922643 -0.1644568 -0.1241035 -0.3933744
## GLNU_norm.H.PET ZSNU_norm.H.PET GLVAR_area.H.PET ZSVAR_H.PET
## 1 0.8791603 1.2441418 1.3802703 -0.09449223
## 2 -0.2990341 -0.4231775 -0.4694797 0.03214021
## Entropy_area.H.PET Max_cooc.W.PET Average_cooc.W.PET Variance_cooc.W.PET
## 1 1.6279234 0.5502762 0.9151412 0.4579807
## 2 -0.5537154 -0.1871688 -0.3112725 -0.1557757
## Entropy_cooc.W.PET DAVE_cooc.W.PET DVAR_cooc.W.PET DENT_cooc.W.PET
## 1 1.4784780 0.9564701 0.5165571 1.450023
## 2 -0.5028837 -0.3253300 -0.1756997 -0.493205
## SAVE_cooc.W.PET SVAR_cooc.W.PET SENT_cooc.W.PET ASM_cooc.W.PET
## 1 0.9140050 0.4135667 1.5336398 0.5955603
## 2 -0.3108861 -0.1406689 -0.5216462 -0.2025715
## Contrast_cooc.W.PET Dissimilarity_cooc.W.PET Inv_diff_cooc.W.PET
## 1 0.5325478 0.9564701 1.2750883
## 2 -0.1811387 -0.3253300 -0.4337035
## Inv_diff_norm_cooc.W.PET IDM_cooc.W.PET IDM_norm_cooc.W.PET
## 1 1.6983343 1.044167 1.7048157
## 2 -0.5776647 -0.355159 -0.5798693
## Inv_var_cooc.W.PET Correlation_cooc.W.PET Autocorrelation_cooc.W.PET
## 1 1.1637708 1.1228422 0.4576739
## 2 -0.3958404 -0.3819191 -0.1556714
## Tendency_cooc.W.PET Shade_cooc.W.PET Prominence_cooc.W.PET IC1_d.W.PET
## 1 0.4135667 0.07642004 0.022900737 -0.26887955
## 2 -0.1406689 -0.02599321 -0.007789366 0.09145563
## IC2_d.W.PET Coarseness_vdif.W.PET Contrast_vdif.W.PET Busyness_vdif.W.PET
## 1 1.4455561 0.7071892 0.8252351 0.4153574
## 2 -0.4916858 -0.2405405 -0.2806922 -0.1412780
## Complexity_vdif.W.PET Strength_vdif.W.PET SRE_align.W.PET LRE_align.W.PET
## 1 0.2991726 0.4249851 1.697315 1.4801473
## 2 -0.1017594 -0.1445527 -0.577318 -0.5034515
## GLNU_align.W.PET RLNU_align.W.PET RP_align.W.PET LGRE_align.W.PET
## 1 0.4738278 0.4182280 1.6901986 0.8300003
## 2 -0.1611659 -0.1422544 -0.5748975 -0.2823130
## HGRE_align.W.PET LGSRE_align.W.PET HGSRE_align.W.PET LGHRE_align.W.PET
## 1 0.4630749 0.8904857 0.4557129 0.5563026
## 2 -0.1575085 -0.3028863 -0.1550044 -0.1892186
## HGLRE_align.W.PET GLNU_norm_align.W.PET RLNU_norm_align.W.PET
## 1 0.4921754 0.8494549 1.658483
## 2 -0.1674066 -0.2889302 -0.564110
## GLVAR_align.W.PET RLVAR_align.W.PET Entropy_align.W.PET SZSE.W.PET
## 1 0.4593218 0.5957178 1.5543465 1.6121174
## 2 -0.1562319 -0.2026251 -0.5286893 -0.5483392
## LZSE.W.PET LGLZE.W.PET HGLZE.W.PET SZLGE.W.PET SZHGE.W.PET LZLGE.W.PET
## 1 0.21517025 0.8709408 0.4690713 0.9938480 0.4481637 -0.004326372
## 2 -0.07318716 -0.2962384 -0.1595481 -0.3380435 -0.1524366 0.001471555
## LZHGE.W.PET GLNU_area.W.PET ZSNU.W.PET ZSP.W.PET GLNU_norm.W.PET
## 1 0.5263985 0.4910918 0.3971868 1.4948131 0.8826796
## 2 -0.1790471 -0.1670380 -0.1350976 -0.5084398 -0.3002311
## ZSNU_norm.W.PET GLVAR_area.W.PET ZSVAR.W.PET Entropy_area.W.PET Min_hist.ADC
## 1 1.4869647 0.4655759 0.06408427 1.6167770 0.5724098
## 2 -0.5057703 -0.1583592 -0.02179737 -0.5499242 -0.1946972
## Max_hist.ADC Mean_hist.ADC Variance_hist.ADC Standard_Deviation_hist.ADC
## 1 1.5075750 1.4864908 0.7599395 1.2359485
## 2 -0.5127806 -0.5056091 -0.2584828 -0.4203906
## Skewness_hist.ADC Kurtosis_hist.ADC Energy_hist.ADC Entropy_hist.ADC
## 1 0.3899909 0.4662845 0.7015053 1.6284344
## 2 -0.1326500 -0.1586002 -0.2386073 -0.5538893
## AUC_hist.ADC Volume.ADC X3D_surface.ADC ratio_3ds_vol.ADC
## 1 1.6655300 0.5687484 0.7349831 1.1042095
## 2 -0.5665068 -0.1934518 -0.2499942 -0.3755815
## ratio_3ds_vol_norm.ADC irregularity.ADC Compactness_v1.ADC Compactness_v2.ADC
## 1 1.6106322 1.6397737 1.1221987 1.3007130
## 2 -0.5478341 -0.5577462 -0.3817002 -0.4424194
## Spherical_disproportion.ADC Sphericity.ADC Asphericity.ADC Center_of_mass.ADC
## 1 1.6106322 1.6242350 1.1989866 0.5373920
## 2 -0.5478341 -0.5524609 -0.4078186 -0.1827864
## Max_3D_diam.ADC Major_axis_length.ADC Minor_axis_length.ADC
## 1 1.0866100 1.2316275 1.1312333
## 2 -0.3695952 -0.4189209 -0.3847732
## Least_axis_length.ADC Elongation.ADC Flatness.ADC Max_cooc.L.ADC
## 1 1.0417403 1.4824827 1.4052040 0.8250964
## 2 -0.3543334 -0.5042458 -0.4779606 -0.2806450
## Average_cooc.L.ADC Variance_cooc.L.ADC Entropy_cooc.L.ADC DAVE_cooc.L.ADC
## 1 1.456079 0.9533869 1.6827114 1.2819538
## 2 -0.495265 -0.3242813 -0.5723508 -0.4360387
## DVAR_cooc.L.ADC DENT_cooc.L.ADC SAVE_cooc.L.ADC SVAR_cooc.L.ADC
## 1 0.9295089 1.6521421 1.4558899 0.9317704
## 2 -0.3161595 -0.5619531 -0.4952006 -0.3169287
## SENT_cooc.L.ADC ASM_cooc.L.ADC Contrast_cooc.L.ADC Dissimilarity_cooc.L.ADC
## 1 1.2584756 0.7127202 0.8811662 1.2819538
## 2 -0.4280529 -0.2424218 -0.2997164 -0.4360387
## Inv_diff_cooc.L.ADC Inv_diff_norm_cooc.L.ADC IDM_cooc.L.ADC
## 1 1.5058302 1.7039344 1.3642322
## 2 -0.5121871 -0.5795695 -0.4640245
## IDM_norm_cooc.L.ADC Inv_var_cooc.L.ADC Correlation_cooc.L.ADC
## 1 1.7073272 1.379898 1.2216811
## 2 -0.5807235 -0.469353 -0.4155378
## Autocorrelation_.L.ADC Tendency_cooc.L.ADC Shade_.L.ADC Prominence_cooc.L.ADC
## 1 1.1050198 0.9317704 0.29259000 0.5515288
## 2 -0.3758571 -0.3169287 -0.09952041 -0.1875948
## IC1_.L.ADC IC2_.L.ADC Coarseness_vdif_.L.ADC Contrast_vdif_.L.ADC
## 1 -0.6732168 1.5121032 0.6939723 0.6587722
## 2 0.2289853 -0.5143208 -0.2360450 -0.2240722
## Busyness_vdif_.L.ADC Complexity_vdif_.L.ADC Strength_vdif_.L.ADC
## 1 0.6475886 1.2753146 0.4214397
## 2 -0.2202682 -0.4337805 -0.1433468
## SRE_align.L.ADC LRE_align.L.ADC GLNU_align.L.ADC RLNU_align.L.ADC
## 1 1.7052408 1.6811893 0.5682374 0.5910147
## 2 -0.5800139 -0.5718331 -0.1932780 -0.2010254
## RP_align.L.ADC LGRE_align.L.ADC HGRE_align.L.ADC LGSRE_align.L.ADC
## 1 1.7034645 0.7243458 1.2086645 0.7235521
## 2 -0.5794097 -0.2463761 -0.4111104 -0.2461061
## HGSRE_align.L.ADC LGHRE_align.L.ADC HGLRE_align.L.ADC GLNU_norm_align.L.ADC
## 1 1.2124123 0.7234431 1.1801466 1.2291014
## 2 -0.4123852 -0.2460691 -0.4014104 -0.4180617
## RLNU_norm_align.L.ADC GLVAR_align.L.ADC RLVAR_align.L.ADC Entropy_align.L.ADC
## 1 1.6955541 0.9930121 1.1385331 1.6982212
## 2 -0.5767191 -0.3377592 -0.3872562 -0.5776262
## SZSE.L.ADC LZSE.L.ADC LGLZE.L.ADC HGLZE.L.ADC SZLGE.L.ADC SZHGE.L.ADC
## 1 1.6968578 1.3430968 0.7262967 1.2295659 0.7219542 1.2399482
## 2 -0.5771625 -0.4568356 -0.2470397 -0.4182197 -0.2455627 -0.4217511
## LZLGE.L.ADC LZHGE.L.ADC GLNU_area.L.ADC ZSNU.L.ADC ZSP.L.ADC GLNU_norm.L.ADC
## 1 0.6651854 1.077189 0.5782984 0.5919629 1.6748354 1.2251432
## 2 -0.2262535 -0.366391 -0.1967001 -0.2013479 -0.5696719 -0.4167154
## ZSNU_norm.L.ADC GLVAR_area.L.ADC ZSVAR.L.ADC Entropy_area.L.ADC
## 1 1.6570978 1.012871 0.6758567 1.7010816
## 2 -0.5636387 -0.344514 -0.2298832 -0.5785992
## Max_cooc.H.ADC Average_cooc.H.ADC Variance_cooc.H.ADC Entropy_cooc.H.ADC
## 1 0.7039103 1.6967547 1.7053247 1.7011475
## 2 -0.2394253 -0.5771274 -0.5800424 -0.5786216
## DAVE_cooc.H.ADC DVAR_cooc.H.ADC DENT_cooc.H.ADC SAVE_cooc.H.ADC
## 1 1.5698813 1.4861394 1.7017575 1.6967573
## 2 -0.5339732 -0.5054896 -0.5788291 -0.5771283
## SVAR_cooc.H.ADC SENT_cooc.H.ADC ASM_cooc.H.ADC Contrast_cooc.H.ADC
## 1 1.6206816 1.6803084 0.6607170 1.3858879
## 2 -0.5512522 -0.5715335 -0.2247337 -0.4713904
## Dissimilarity_cooc.H.ADC Inv_diff_cooc.H.ADC Inv_diff_norm_cooc.H.ADC
## 1 1.5698813 1.5546888 1.7028145
## 2 -0.5339732 -0.5288057 -0.5791886
## IDM_cooc.H.ADC IDM_norm_cooc.H.ADC Inv_var_cooc.H.ADC Correlation_cooc.H.ADC
## 1 1.4136874 1.7054539 1.4364367 1.1993586
## 2 -0.4808461 -0.5800864 -0.4885839 -0.4079451
## Autocorrelation_cooc.H.ADC Tendency_cooc.H.ADC Shade_cooc.H.ADC
## 1 1.6722184 1.6206816 0.3887230
## 2 -0.5687818 -0.5512522 -0.1322187
## Prominence_cooc.H.ADC IC1_d.H.ADC IC2_d.H.ADC Coarseness_vdif.H.ADC
## 1 1.5404751 -0.5455177 1.5085932 0.6780216
## 2 -0.5239711 0.1855502 -0.5131269 -0.2306196
## Contrast_vdif.H.ADC Busyness_vdif.H.ADC Complexity_vdif.H.ADC
## 1 1.5316725 0.6153610 1.503704
## 2 -0.5209771 -0.2093065 -0.511464
## Strength_vdif.H.ADC SRE_align.H.ADC LRE_align.H.ADC GLNU_align.H.ADC
## 1 0.3677298 1.7071497 1.7038845 0.5901231
## 2 -0.1250782 -0.5806632 -0.5795526 -0.2007222
## RLNU_align.H.ADC RP_align.H.ADC LGRE_align.H.ADC HGRE_align.H.ADC
## 1 0.5924412 1.706814 1.0946139 1.7100780
## 2 -0.2015106 -0.580549 -0.3723177 -0.5816592
## LGSRE_align.H.ADC HGSRE_align.H.ADC LGHRE_align.H.ADC HGLRE_align.H.ADC
## 1 1.0760014 1.7093907 1.1710039 1.7053139
## 2 -0.3659869 -0.5814254 -0.3983006 -0.5800387
## GLNU_norm_align.H.ADC RLNU_norm_align.H.ADC GLVAR_align.H.ADC
## 1 0.9735389 1.7053279 1.7100152
## 2 -0.3311357 -0.5800435 -0.5816378
## RLVAR_align.H.ADC Entropy_align.H.ADC SZSE.H.ADC LZSE.H.ADC LGLZE.H.ADC
## 1 1.0687509 1.7093530 1.7049082 1.6336887 1.0589022
## 2 -0.3635207 -0.5814126 -0.5799008 -0.5556764 -0.3601708
## HGLZE.H.ADC SZLGE.H.ADC SZHGE.H.ADC LZLGE.H.ADC LZHGE.H.ADC GLNU_area.H.ADC
## 1 1.709075 1.0114862 1.7031396 1.0813161 1.5698347 0.5919958
## 2 -0.581318 -0.3440429 -0.5792992 -0.3677946 -0.5339574 -0.2013591
## ZSNU.H.ADC ZSP.H.ADC GLNU_norm.H.ADC ZSNU_norm.H.ADC GLVAR_area.H.ADC
## 1 0.5972096 1.7013318 0.9745507 1.692802 1.7072803
## 2 -0.2031325 -0.5786843 -0.3314798 -0.575783 -0.5807076
## ZSVAR.H.ADC Entropy_area.H.ADC Max_cooc.W.ADC Average_cooc.W.ADC
## 1 0.8431301 1.7066118 0.6868122 1.199285
## 2 -0.2867790 -0.5804802 -0.2336096 -0.407920
## Variance_cooc.W.ADC DAVE_cooc.W.ADC DVAR_cooc.W.ADC DENT_cooc.W.ADC
## 1 0.7283676 1.3033631 0.7679414 1.6768624
## 2 -0.2477441 -0.4433208 -0.2612045 -0.5703613
## SAVE_cooc.W.ADC SVAR_cooc.W.ADC SENT_cooc.W.ADC ASM_cooc.W.ADC
## 1 1.1909017 0.6843706 1.2023295 0.6601442
## 2 -0.4050686 -0.2327791 -0.4089556 -0.2245389
## Contrast_cooc.W.ADC Dissimilarity_cooc.W.ADC Inv_diff_cooc.W.ADC
## 1 0.7994120 1.3033631 1.3827605
## 2 -0.2719088 -0.4433208 -0.4703267
## Inv_diff_norm_cooc.W.ADC IDM_cooc.W.ADC IDM_norm_cooc.W.ADC
## 1 1.7038802 1.3112119 1.7073083
## 2 -0.5795511 -0.4459904 -0.5807171
## Inv_var_cooc.W.ADC Correlation_cooc.W.ADC Autocorrelation_cooc.W.ADC
## 1 1.3074526 1.2225367 0.8447953
## 2 -0.4447118 -0.4158288 -0.2873453
## Tendency_cooc.W.ADC Shade_cooc.W.ADC Prominence_cooc.W.ADC IC1_d.W.ADC
## 1 0.6843706 0.2567335 0.3775512 -0.6756692
## 2 -0.2327791 -0.0873243 -0.1284188 0.2298194
## IC2_d.W.ADC Coarseness_vdif.W.ADC Contrast_vdif.W.ADC Busyness_vdif.W.ADC
## 1 1.6012140 0.7114542 0.6249552 1.0116700
## 2 -0.5446306 -0.2419912 -0.2125698 -0.3441054
## Complexity_vdif.W.ADC Strength_vdif.W.ADC SRE_align.W.ADC LRE_align.W.ADC
## 1 0.6003182 0.5784705 1.7073214 1.7065667
## 2 -0.2041899 -0.1967587 -0.5807216 -0.5804649
## GLNU_align.W.ADC RLNU_align.W.ADC RP_align.W.ADC LGRE_align.W.ADC
## 1 0.6326468 0.5857336 1.7071535 0.6918953
## 2 -0.2151860 -0.1992291 -0.5806645 -0.2353386
## HGRE_align.W.ADC LGSRE_align.W.ADC HGSRE_align.W.ADC LGHRE_align.W.ADC
## 1 0.8626770 0.6918084 0.8616174 0.6894568
## 2 -0.2934276 -0.2353090 -0.2930672 -0.2345091
## HGLRE_align.W.ADC GLNU_norm_align.W.ADC RLNU_norm_align.W.ADC
## 1 0.866512 0.9154487 1.7063312
## 2 -0.294732 -0.3113771 -0.5803848
## GLVAR_align.W.ADC RLVAR_align.W.ADC Entropy_align.W.ADC SZSE.W.ADC LZSE.W.ADC
## 1 0.7640782 0.9834635 1.661714 1.7066974 1.6823970
## 2 -0.2598905 -0.3345114 -0.565209 -0.5805093 -0.5722439
## LGLZE.W.ADC HGLZE.W.ADC SZLGE.W.ADC SZHGE.W.ADC LZLGE.W.ADC LZHGE.W.ADC
## 1 0.6918923 0.8639228 0.6899145 0.8602645 0.6450074 0.8755515
## 2 -0.2353375 -0.2938513 -0.2346648 -0.2926070 -0.2193903 -0.2978066
## GLNU_area.W.ADC ZSNU.W.ADC ZSP.W.ADC GLNU_norm.W.ADC ZSNU_norm.W.ADC
## 1 0.6327545 0.5822861 1.7050925 0.9137899 1.699026
## 2 -0.2152226 -0.1980565 -0.5799634 -0.3108129 -0.577900
## GLVAR_area.W.ADC ZSVAR.W.ADC Entropy_area.W.ADC
## 1 0.7713592 1.0785430 1.672228
## 2 -0.2623671 -0.3668514 -0.568785
##
## Clustering vector:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 21058.70 23808.27
## (between_SS / total_SS = 46.6 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
#final data
fviz_cluster(final, data = final_m3)
Now that we have done the K-Means clustering, we are aiming for unsupervised learning using hierarchical model
# Dissimilarity matrix
d <- dist(final_m3, method = "euclidean")
# Plot cluster results
p1 <- fviz_nbclust(final_m3, FUN = hcut, method = "wss",
k.max = 10) +
ggtitle("(A) Elbow method")
p2 <- fviz_nbclust(final_m3, FUN = hcut, method = "silhouette",
k.max = 10) +
ggtitle("(B) Silhouette method")
p3 <- fviz_nbclust(final_m3, FUN = hcut, method = "gap_stat",
k.max = 10) +
ggtitle("(C) Gap statistic")
# Display plots side by side
gridExtra::grid.arrange(p1, p2, p3, nrow = 1)
# Construct dendorgram for the given data
hc5 <- hclust(d, method = "ward.D2" )
dend_plot <- fviz_dend(hc5)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.
dend_data <- attr(dend_plot, "dendrogram")
dend_cuts <- cut(dend_data, h = 2)
dend_cuts
## $upper
## 'dendrogram' with 2 branches and 187 members total, at height 280.0608
##
## $lower
## $lower[[1]]
## 'dendrogram' leaf '169', at height 0
##
## $lower[[2]]
## 'dendrogram' leaf '155', at height 0
##
## $lower[[3]]
## 'dendrogram' leaf '175', at height 0
##
## $lower[[4]]
## 'dendrogram' leaf '148', at height 0
##
## $lower[[5]]
## 'dendrogram' leaf '188', at height 0
##
## $lower[[6]]
## 'dendrogram' leaf '154', at height 0
##
## $lower[[7]]
## 'dendrogram' leaf '158', at height 0
##
## $lower[[8]]
## 'dendrogram' leaf '176', at height 0
##
## $lower[[9]]
## 'dendrogram' leaf '183', at height 0
##
## $lower[[10]]
## 'dendrogram' leaf '184', at height 0
##
## $lower[[11]]
## 'dendrogram' leaf '193', at height 0
##
## $lower[[12]]
## 'dendrogram' leaf '194', at height 0
##
## $lower[[13]]
## 'dendrogram' with 2 branches and 2 members total, at height 1.053377
##
## $lower[[14]]
## 'dendrogram' leaf '191', at height 0
##
## $lower[[15]]
## 'dendrogram' leaf '196', at height 0
##
## $lower[[16]]
## 'dendrogram' leaf '189', at height 0
##
## $lower[[17]]
## 'dendrogram' leaf '190', at height 0
##
## $lower[[18]]
## 'dendrogram' leaf '167', at height 0
##
## $lower[[19]]
## 'dendrogram' leaf '159', at height 0
##
## $lower[[20]]
## 'dendrogram' leaf '171', at height 0
##
## $lower[[21]]
## 'dendrogram' leaf '180', at height 0
##
## $lower[[22]]
## 'dendrogram' leaf '157', at height 0
##
## $lower[[23]]
## 'dendrogram' leaf '150', at height 0
##
## $lower[[24]]
## 'dendrogram' leaf '149', at height 0
##
## $lower[[25]]
## 'dendrogram' leaf '151', at height 0
##
## $lower[[26]]
## 'dendrogram' leaf '192', at height 0
##
## $lower[[27]]
## 'dendrogram' leaf '170', at height 0
##
## $lower[[28]]
## 'dendrogram' leaf '177', at height 0
##
## $lower[[29]]
## 'dendrogram' leaf '182', at height 0
##
## $lower[[30]]
## 'dendrogram' leaf '153', at height 0
##
## $lower[[31]]
## 'dendrogram' leaf '164', at height 0
##
## $lower[[32]]
## 'dendrogram' leaf '162', at height 0
##
## $lower[[33]]
## 'dendrogram' leaf '152', at height 0
##
## $lower[[34]]
## 'dendrogram' leaf '160', at height 0
##
## $lower[[35]]
## 'dendrogram' leaf '163', at height 0
##
## $lower[[36]]
## 'dendrogram' leaf '156', at height 0
##
## $lower[[37]]
## 'dendrogram' leaf '179', at height 0
##
## $lower[[38]]
## 'dendrogram' leaf '181', at height 0
##
## $lower[[39]]
## 'dendrogram' leaf '165', at height 0
##
## $lower[[40]]
## 'dendrogram' leaf '174', at height 0
##
## $lower[[41]]
## 'dendrogram' leaf '185', at height 0
##
## $lower[[42]]
## 'dendrogram' leaf '186', at height 0
##
## $lower[[43]]
## 'dendrogram' leaf '173', at height 0
##
## $lower[[44]]
## 'dendrogram' leaf '187', at height 0
##
## $lower[[45]]
## 'dendrogram' leaf '172', at height 0
##
## $lower[[46]]
## 'dendrogram' leaf '168', at height 0
##
## $lower[[47]]
## 'dendrogram' leaf '178', at height 0
##
## $lower[[48]]
## 'dendrogram' leaf '161', at height 0
##
## $lower[[49]]
## 'dendrogram' leaf '166', at height 0
##
## $lower[[50]]
## 'dendrogram' leaf '126', at height 0
##
## $lower[[51]]
## 'dendrogram' leaf '128', at height 0
##
## $lower[[52]]
## 'dendrogram' leaf '125', at height 0
##
## $lower[[53]]
## 'dendrogram' with 2 branches and 2 members total, at height 1.598802
##
## $lower[[54]]
## 'dendrogram' leaf '146', at height 0
##
## $lower[[55]]
## 'dendrogram' leaf '147', at height 0
##
## $lower[[56]]
## 'dendrogram' with 2 branches and 3 members total, at height 1.798135
##
## $lower[[57]]
## 'dendrogram' leaf '132', at height 0
##
## $lower[[58]]
## 'dendrogram' leaf '135', at height 0
##
## $lower[[59]]
## 'dendrogram' leaf '142', at height 0
##
## $lower[[60]]
## 'dendrogram' leaf '137', at height 0
##
## $lower[[61]]
## 'dendrogram' leaf '141', at height 0
##
## $lower[[62]]
## 'dendrogram' leaf '123', at height 0
##
## $lower[[63]]
## 'dendrogram' leaf '130', at height 0
##
## $lower[[64]]
## 'dendrogram' leaf '127', at height 0
##
## $lower[[65]]
## 'dendrogram' leaf '144', at height 0
##
## $lower[[66]]
## 'dendrogram' leaf '120', at height 0
##
## $lower[[67]]
## 'dendrogram' leaf '121', at height 0
##
## $lower[[68]]
## 'dendrogram' leaf '46', at height 0
##
## $lower[[69]]
## 'dendrogram' leaf '119', at height 0
##
## $lower[[70]]
## 'dendrogram' leaf '17', at height 0
##
## $lower[[71]]
## 'dendrogram' leaf '117', at height 0
##
## $lower[[72]]
## 'dendrogram' leaf '88', at height 0
##
## $lower[[73]]
## 'dendrogram' leaf '114', at height 0
##
## $lower[[74]]
## 'dendrogram' leaf '97', at height 0
##
## $lower[[75]]
## 'dendrogram' leaf '90', at height 0
##
## $lower[[76]]
## 'dendrogram' leaf '91', at height 0
##
## $lower[[77]]
## 'dendrogram' with 2 branches and 2 members total, at height 0.385378
##
## $lower[[78]]
## 'dendrogram' with 2 branches and 2 members total, at height 0.7506009
##
## $lower[[79]]
## 'dendrogram' leaf '64', at height 0
##
## $lower[[80]]
## 'dendrogram' leaf '75', at height 0
##
## $lower[[81]]
## 'dendrogram' leaf '48', at height 0
##
## $lower[[82]]
## 'dendrogram' leaf '66', at height 0
##
## $lower[[83]]
## 'dendrogram' leaf '68', at height 0
##
## $lower[[84]]
## 'dendrogram' leaf '11', at height 0
##
## $lower[[85]]
## 'dendrogram' leaf '118', at height 0
##
## $lower[[86]]
## 'dendrogram' leaf '21', at height 0
##
## $lower[[87]]
## 'dendrogram' leaf '24', at height 0
##
## $lower[[88]]
## 'dendrogram' leaf '19', at height 0
##
## $lower[[89]]
## 'dendrogram' leaf '39', at height 0
##
## $lower[[90]]
## 'dendrogram' leaf '32', at height 0
##
## $lower[[91]]
## 'dendrogram' leaf '82', at height 0
##
## $lower[[92]]
## 'dendrogram' leaf '4', at height 0
##
## $lower[[93]]
## 'dendrogram' leaf '18', at height 0
##
## $lower[[94]]
## 'dendrogram' leaf '61', at height 0
##
## $lower[[95]]
## 'dendrogram' leaf '20', at height 0
##
## $lower[[96]]
## 'dendrogram' leaf '33', at height 0
##
## $lower[[97]]
## 'dendrogram' leaf '60', at height 0
##
## $lower[[98]]
## 'dendrogram' leaf '15', at height 0
##
## $lower[[99]]
## 'dendrogram' leaf '41', at height 0
##
## $lower[[100]]
## 'dendrogram' leaf '72', at height 0
##
## $lower[[101]]
## 'dendrogram' leaf '81', at height 0
##
## $lower[[102]]
## 'dendrogram' with 2 branches and 2 members total, at height 0.6585336
##
## $lower[[103]]
## 'dendrogram' with 2 branches and 3 members total, at height 1.235516
##
## $lower[[104]]
## 'dendrogram' leaf '70', at height 0
##
## $lower[[105]]
## 'dendrogram' leaf '112', at height 0
##
## $lower[[106]]
## 'dendrogram' leaf '2', at height 0
##
## $lower[[107]]
## 'dendrogram' leaf '7', at height 0
##
## $lower[[108]]
## 'dendrogram' leaf '27', at height 0
##
## $lower[[109]]
## 'dendrogram' leaf '38', at height 0
##
## $lower[[110]]
## 'dendrogram' leaf '1', at height 0
##
## $lower[[111]]
## 'dendrogram' leaf '28', at height 0
##
## $lower[[112]]
## 'dendrogram' leaf '89', at height 0
##
## $lower[[113]]
## 'dendrogram' leaf '34', at height 0
##
## $lower[[114]]
## 'dendrogram' leaf '35', at height 0
##
## $lower[[115]]
## 'dendrogram' leaf '50', at height 0
##
## $lower[[116]]
## 'dendrogram' leaf '51', at height 0
##
## $lower[[117]]
## 'dendrogram' leaf '52', at height 0
##
## $lower[[118]]
## 'dendrogram' leaf '58', at height 0
##
## $lower[[119]]
## 'dendrogram' leaf '107', at height 0
##
## $lower[[120]]
## 'dendrogram' leaf '13', at height 0
##
## $lower[[121]]
## 'dendrogram' leaf '111', at height 0
##
## $lower[[122]]
## 'dendrogram' leaf '83', at height 0
##
## $lower[[123]]
## 'dendrogram' leaf '100', at height 0
##
## $lower[[124]]
## 'dendrogram' leaf '65', at height 0
##
## $lower[[125]]
## 'dendrogram' leaf '42', at height 0
##
## $lower[[126]]
## 'dendrogram' leaf '54', at height 0
##
## $lower[[127]]
## 'dendrogram' leaf '71', at height 0
##
## $lower[[128]]
## 'dendrogram' leaf '78', at height 0
##
## $lower[[129]]
## 'dendrogram' leaf '93', at height 0
##
## $lower[[130]]
## 'dendrogram' leaf '40', at height 0
##
## $lower[[131]]
## 'dendrogram' leaf '23', at height 0
##
## $lower[[132]]
## 'dendrogram' leaf '30', at height 0
##
## $lower[[133]]
## 'dendrogram' leaf '45', at height 0
##
## $lower[[134]]
## 'dendrogram' leaf '106', at height 0
##
## $lower[[135]]
## 'dendrogram' leaf '136', at height 0
##
## $lower[[136]]
## 'dendrogram' leaf '85', at height 0
##
## $lower[[137]]
## 'dendrogram' with 2 branches and 2 members total, at height 0.5735473
##
## $lower[[138]]
## 'dendrogram' leaf '129', at height 0
##
## $lower[[139]]
## 'dendrogram' leaf '140', at height 0
##
## $lower[[140]]
## 'dendrogram' leaf '131', at height 0
##
## $lower[[141]]
## 'dendrogram' leaf '145', at height 0
##
## $lower[[142]]
## 'dendrogram' leaf '138', at height 0
##
## $lower[[143]]
## 'dendrogram' leaf '139', at height 0
##
## $lower[[144]]
## 'dendrogram' leaf '22', at height 0
##
## $lower[[145]]
## 'dendrogram' leaf '49', at height 0
##
## $lower[[146]]
## 'dendrogram' leaf '59', at height 0
##
## $lower[[147]]
## 'dendrogram' leaf '31', at height 0
##
## $lower[[148]]
## 'dendrogram' leaf '44', at height 0
##
## $lower[[149]]
## 'dendrogram' leaf '56', at height 0
##
## $lower[[150]]
## 'dendrogram' leaf '77', at height 0
##
## $lower[[151]]
## 'dendrogram' leaf '76', at height 0
##
## $lower[[152]]
## 'dendrogram' leaf '115', at height 0
##
## $lower[[153]]
## 'dendrogram' leaf '14', at height 0
##
## $lower[[154]]
## 'dendrogram' leaf '36', at height 0
##
## $lower[[155]]
## 'dendrogram' leaf '110', at height 0
##
## $lower[[156]]
## 'dendrogram' leaf '55', at height 0
##
## $lower[[157]]
## 'dendrogram' leaf '73', at height 0
##
## $lower[[158]]
## 'dendrogram' leaf '74', at height 0
##
## $lower[[159]]
## 'dendrogram' leaf '69', at height 0
##
## $lower[[160]]
## 'dendrogram' leaf '79', at height 0
##
## $lower[[161]]
## 'dendrogram' leaf '67', at height 0
##
## $lower[[162]]
## 'dendrogram' leaf '62', at height 0
##
## $lower[[163]]
## 'dendrogram' leaf '99', at height 0
##
## $lower[[164]]
## 'dendrogram' leaf '9', at height 0
##
## $lower[[165]]
## 'dendrogram' leaf '25', at height 0
##
## $lower[[166]]
## 'dendrogram' leaf '109', at height 0
##
## $lower[[167]]
## 'dendrogram' leaf '57', at height 0
##
## $lower[[168]]
## 'dendrogram' leaf '80', at height 0
##
## $lower[[169]]
## 'dendrogram' leaf '8', at height 0
##
## $lower[[170]]
## 'dendrogram' leaf '16', at height 0
##
## $lower[[171]]
## 'dendrogram' leaf '86', at height 0
##
## $lower[[172]]
## 'dendrogram' leaf '87', at height 0
##
## $lower[[173]]
## 'dendrogram' leaf '63', at height 0
##
## $lower[[174]]
## 'dendrogram' leaf '53', at height 0
##
## $lower[[175]]
## 'dendrogram' leaf '5', at height 0
##
## $lower[[176]]
## 'dendrogram' leaf '26', at height 0
##
## $lower[[177]]
## 'dendrogram' leaf '43', at height 0
##
## $lower[[178]]
## 'dendrogram' leaf '108', at height 0
##
## $lower[[179]]
## 'dendrogram' leaf '6', at height 0
##
## $lower[[180]]
## 'dendrogram' leaf '47', at height 0
##
## $lower[[181]]
## 'dendrogram' leaf '3', at height 0
##
## $lower[[182]]
## 'dendrogram' leaf '10', at height 0
##
## $lower[[183]]
## 'dendrogram' leaf '12', at height 0
##
## $lower[[184]]
## 'dendrogram' leaf '29', at height 0
##
## $lower[[185]]
## 'dendrogram' leaf '84', at height 0
##
## $lower[[186]]
## 'dendrogram' leaf '37', at height 0
##
## $lower[[187]]
## 'dendrogram' leaf '95', at height 0
#fviz_dend(dend_cuts$lower[[3]])
# Ward's method
hc5 <- hclust(d, method = "ward.D2" )
# Cut tree into 4 groups
sub_grp <- cutree(hc5, k = 2)
# Number of members in each cluster
table(sub_grp)
## sub_grp
## 1 2
## 147 50
# Plot full dendogram
fviz_dend(
hc5,
k = 2,
horiz = TRUE,
rect = TRUE,
rect_fill = TRUE,
rect_border = "jco",
k_colors = "jco",
cex = 0.1
)
dend_plot <- fviz_dend(hc5) # create full dendogram
dend_data <- attr(dend_plot, "dendrogram") # extract plot info
dend_cuts <- cut(dend_data, h = 70.5) # cut the dendogram at
# designated height
# Create sub dendrogram plots
p1 <- fviz_dend(dend_cuts$lower[[1]])
p2 <- fviz_dend(dend_cuts$lower[[1]], type = 'circular')
# Side by side plots
gridExtra::grid.arrange(p1, p2, nrow = 1)
There is one problem for the model-based clustering techniques. In
the model based clustering techniques since we have too much data, the
plot would not be able to generate correctly. An Error for plot.new()
indicating figure margin is too large will always occur
even if we set the margin to c(1,1,1,1).
Therefore, to apply GMM model with 3 components and plot the results
correctly, I only chose four columns from the original data frame:
Failure, Entropy_cooc.W.ADC,
Entropy_hist.PET and Entropy_cooc.L.PET.
# Apply GMM model with 3 components
df_failure <- select(final_m3, Failure, Entropy_cooc.W.ADC, Entropy_hist.PET, Entropy_cooc.L.PET)
arrest_mc <- Mclust(df_failure, G = 3)
# Plot results
par(mar=c(1,1,1,1))
plot(arrest_mc, what = "density")
plot(arrest_mc, what = "uncertainty")
# Observations with high uncertainty
sort(arrest_mc$uncertainty, decreasing = TRUE) %>% head()
## 116 101 79 43 4 139
## 0.4391194 0.3439769 0.3436707 0.3222617 0.3128399 0.3021089
summary(arrest_mc)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VVI (diagonal, varying volume and shape) model with 3 components:
##
## log-likelihood n df BIC ICL
## -627.5309 197 26 -1392.425 -1407.834
##
## Clustering table:
## 1 2 3
## 89 58 50
arrest_optimal_mc <- Mclust(df_failure)
summary(arrest_optimal_mc)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VVI (diagonal, varying volume and shape) model with 5 components:
##
## log-likelihood n df BIC ICL
## -578.4222 197 44 -1389.305 -1422.034
##
## Clustering table:
## 1 2 3 4 5
## 43 33 24 47 50
legend_args <- list(x = "bottomright", ncol = 5)
plot(arrest_optimal_mc, what = 'BIC', legendArgs = legend_args)
plot(arrest_optimal_mc, what = 'classification')
plot(arrest_optimal_mc, what = 'uncertainty')
df_mc <- Mclust(df_failure, 1:20)
summary(df_mc)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VVI (diagonal, varying volume and shape) model with 5 components:
##
## log-likelihood n df BIC ICL
## -578.4222 197 44 -1389.305 -1422.034
##
## Clustering table:
## 1 2 3 4 5
## 43 33 24 47 50
plot(df_mc, what = 'BIC',
legendArgs = list(x = "bottomright", ncol = 5))
probabilities <- df_mc$z
probabilities <- probabilities %>%
as.data.frame() %>%
mutate(id = row_number()) %>%
tidyr::gather(cluster, probability, -id)
ggplot(probabilities, aes(probability)) +
geom_histogram() +
facet_wrap(~ cluster, nrow = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
uncertainty <- data.frame(
id = 1:nrow(df_failure),
cluster = df_mc$classification,
uncertainty = df_mc$uncertainty
)
uncertainty %>%
group_by(cluster) %>%
filter(uncertainty > 0.0001) %>%
ggplot(aes(uncertainty, reorder(id, uncertainty))) +
geom_point() +
facet_wrap(~ cluster, scales = 'free_y', nrow = 1)
cluster2 <- df_failure %>%
scale() %>%
as.data.frame() %>%
mutate(cluster = df_mc$classification) %>%
filter(cluster == 2) %>%
select(-cluster)
cluster2 %>%
tidyr::gather(product, std_count) %>%
group_by(product) %>%
summarize(avg = mean(std_count)) %>%
ggplot(aes(avg, reorder(product, avg))) +
geom_point() +
labs(x = "Average standardized consumption", y = NULL)
However, if I do the model-based clustering for the
entire data set, it would not be able to plot the graphs since every
time it will return the figure margin is too large error.
Therefore, without plotting the density,
uncetainty, and classification graphs, one
should be able to reach the results as followed:
# Apply GMM model with 3 components
arrest_final_mc <- Mclust(final_m3, G = 3)
# Plot results
# par(mar=c(1,1,1,1))
# plot(arrest_mc, what = "density")
# plot(arrest_mc, what = "uncertainty")
# Observations with high uncertainty
sort(arrest_final_mc$uncertainty, decreasing = TRUE) %>% head()
## 100 93 48 4 1 2
## 3.485515e-07 5.757084e-11 5.693224e-13 8.881784e-16 0.000000e+00 0.000000e+00
summary(arrest_mc)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VVI (diagonal, varying volume and shape) model with 3 components:
##
## log-likelihood n df BIC ICL
## -627.5309 197 26 -1392.425 -1407.834
##
## Clustering table:
## 1 2 3
## 89 58 50
arrest_optimal_final <- Mclust(final_m3)
summary(arrest_optimal_final)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VVI (diagonal, varying volume and shape) model with 7 components:
##
## log-likelihood n df BIC ICL
## -33791.44 197 6012 -99345.5 -99345.5
##
## Clustering table:
## 1 2 3 4 5 6 7
## 94 38 3 12 9 34 7
legend_args <- list(x = "bottomright", ncol = 5)
#plot(arrest_optimal_final, what = 'BIC', legendArgs = legend_args)
#plot(arrest_optimal_final, what = 'classification')
#plot(arrest_optimal_final, what = 'uncertainty')
df_finalmc <- Mclust(final_m3, 1:20)
summary(df_finalmc)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VEI (diagonal, equal shape) model with 10 components:
##
## log-likelihood n df BIC ICL
## -36831.86 197 4737 -98690.26 -98690.27
##
## Clustering table:
## 1 2 3 4 5 6 7 8 9 10
## 60 44 26 3 2 12 10 12 11 17
# plot(df_finalmc, what = 'BIC',
# legendArgs = list(x = "bottomright", ncol = 5))
probabilities <- df_finalmc$z
probabilities <- probabilities %>%
as.data.frame() %>%
mutate(id = row_number()) %>%
tidyr::gather(cluster, probability, -id)
ggplot(probabilities, aes(probability)) +
geom_histogram() +
facet_wrap(~ cluster, nrow = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
uncertainty <- data.frame(
id = 1:nrow(final_m3),
cluster = df_finalmc$classification,
uncertainty = df_finalmc$uncertainty
)
uncertainty %>%
group_by(cluster) %>%
filter(uncertainty > 0.0001) %>%
ggplot(aes(uncertainty, reorder(id, uncertainty))) +
geom_point() +
facet_wrap(~ cluster, scales = 'free_y', nrow = 1)
cluster2 <- final_m3 %>%
scale() %>%
as.data.frame() %>%
mutate(cluster = df_finalmc$classification) %>%
filter(cluster == 2) %>%
select(-cluster)
cluster2 %>%
tidyr::gather(product, std_count) %>%
group_by(product) %>%
summarize(avg = mean(std_count)) %>%
ggplot(aes(avg, reorder(product, avg))) +
geom_point() +
labs(x = "Average standardized consumption", y = NULL)